package com.xlongwei.light4j.handler.demo;

import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.xlongwei.light4j.handler.ServiceHandler.AbstractHandler;
import com.xlongwei.light4j.util.FileUtil;
import com.xlongwei.light4j.util.FileUtil.CharsetNames;
import com.xlongwei.light4j.util.HandlerUtil;
import com.xlongwei.light4j.util.JsonUtil;
import com.xlongwei.light4j.util.NumberUtil;
import com.xlongwei.light4j.util.RedisConfig;
import com.xlongwei.light4j.util.StringUtil;
import com.xlongwei.light4j.util.TaskUtil;

import cn.hutool.core.thread.ThreadUtil;
import io.undertow.server.HttpServerExchange;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public class CrawlHandler extends AbstractHandler {

	public void config(HttpServerExchange exchange) throws Exception {
		String crawler = HandlerUtil.getParam(exchange, "crawler");
		String data = HandlerUtil.getParam(exchange, "data");
		String key = StringUtil.isBlank(crawler) ? "crawler.crawl" : "crawler.crawl." + crawler;
		JSONArray array = JsonUtil.parseArray(StringUtil.firstNotBlank(RedisConfig.get(key), "[]"));
		JSONObject config = JsonUtil.parse(data);
		if (config != null && config.containsKey("name")) {
			String name = config.getString("name");
			boolean add = true;
			for(int i=0,s=array.size(); i<s; i++) {
				if (name.equalsIgnoreCase(array.getJSONObject(i).getString("name"))) {
					array.set(i, config);
					add = false;
				}
			}
			if(add) array.add(config);
			RedisConfig.set(key, array.toJSONString());
		}
		HandlerUtil.setResp(exchange, array);
	}

	public void exec(HttpServerExchange exchange) throws Exception {
		String crawler = HandlerUtil.getParam(exchange, "crawler");
		String step = HandlerUtil.getParam(exchange, "step");
		boolean depth = NumberUtil.parseBoolean(HandlerUtil.getParam(exchange, "depth"), true);
		log.info("exec crawler: {}, step: {}", crawler, step);
		if(StringUtil.isBlank(crawler)) return;
		JSONArray crawls = JsonUtil.parseArray(StringUtil.firstNotBlank(RedisConfig.get("crawler.crawl"), "[]"));
		JSONObject config = findInArray(crawls, crawler);
		if(config==null) return ;
		
		final JSONArray steps = JsonUtil.parseArray(StringUtil.firstNotBlank(RedisConfig.get("crawler.crawl."+crawler), "[]"));
		if(steps==null || steps.isEmpty()) return ;
		if(StringUtil.isBlank(step) || StringUtil.isNumbers(step) || (step.startsWith("-") && StringUtil.isNumbers(step.substring(1)))) {
			boolean lock = RedisConfig.lock(RedisConfig.LOCK, "crawler.crawl."+crawler, NumberUtil.parseInt(RedisConfig.get("crawler.crawl.millis.crawler"), 30000) / 1000, 4);
			if(!lock) {
				log.info("fail to lock crawler {}", crawler);
				return ;
			}
			TaskUtil.submit(new Runnable() {
				@Override
				public void run() {
					int level = 1, width = NumberUtil.parseInt(step, 1);
					if(-steps.size() <= width && width <=-1) {
						level = -width; //step=-2可以从level2开始
					}
					if(depth) {
						stats.get().start(level, 1);
						depthFirst(crawler, steps, level, width);
						log.warn("stats: {}", stats.get().stats(steps.size()));
						stats.remove();
					}else {
						breadthFirst(crawler, steps, level, width);
					}
					log.warn("crawl {}.{} finished", crawler, level);
				}
			});
			config.put("steps", steps);
			HandlerUtil.setResp(exchange, config);
		}else {
			config = findInArray(steps, step);
			String url = config.getString("url");
			if(StringUtil.isUrl(url)) {
				String html = RedisConfig.get(RedisConfig.HTML, url);
				if(StringUtil.isBlank(html)) {
					html = get(url);
					RedisConfig.set(RedisConfig.HTML, url, html);
				}
				String resp = cheerioEval(crawler, step, url);
				JSONObject json = JsonUtil.parseNew(resp);
				saveData(json.get("data"));
				HandlerUtil.setResp(exchange, json);
			}
		}
	}

	private void breadthFirst(String crawler, JSONArray steps, int level, int width) {
		List<String> urls = new LinkedList<>();
		while(true) {
			JSONObject stepConfig = null;
			for(int j=0;j<steps.size();j++) {
				JSONObject config = steps.getJSONObject(j);
				if(level==NumberUtil.parseInt(config.getString("level"), 0)) {
					stepConfig = config; //add first url
					if(level == 1) urls.add(config.getString("url"));
					break;
				}
			}
			if(stepConfig!=null && stepConfig.containsKey("name")) {
				log.info("crawl {}.{} width={} {} urls {}", crawler, level, width, urls.size(), urls);
				List<String> runStep = runStep(crawler, stepConfig.getString("name"), urls);
				if(!runStep.isEmpty()) {
					if(width<1 || width>=runStep.size()) {
						urls = runStep;
					}else {
						log.info("crawl {}.{} width={} runStep={} filterd={}", crawler, level, runStep.size(), width);
						urls = new LinkedList<>(Arrays.asList(runStep.toArray(new String[runStep.size()])).subList(0, width));
					}
					level++;
					continue;
				}
			}
			break;
		}
	}
	
	//[(crawls,total),(crawls,total)]
	private ThreadLocal<LevelProgress> stats = ThreadLocal.withInitial(new Supplier<LevelProgress>() {
		@Override
		public LevelProgress get() {
			return new LevelProgress();
		}
	});
	
	static class LevelProgress {
		int levels = 10;
		AtomicInteger[] progress = new AtomicInteger[levels];
		AtomicInteger[] levelTotal = new AtomicInteger[levels];
		AtomicInteger[] crawlTotal = new AtomicInteger[levels];
		public void start(int level, int total) {
			progress[level] = new AtomicInteger(0);
			if(levelTotal[level] == null) {
				levelTotal[level] = new AtomicInteger(total);
			}else {
				levelTotal[level].set(total);
			}
			if(crawlTotal[level] == null) {
				crawlTotal[level] = new AtomicInteger(total);
			}else {
				crawlTotal[level].addAndGet(total);
			}
		}
		public void progress(int level) {
			progress[level].incrementAndGet();
		}
		public String stats(int level) {
			StringBuilder sb = new StringBuilder();
			for(int i=level;i>0;i--) {
				if(levelTotal[i] == null) {
					break;
				}else {
					sb.append(" level").append(i).append("=").append(progress[i].get()).append("/").append(levelTotal[i].get()).append("/").append(crawlTotal[i].get());
				}
			}
			return sb.toString();
		}
	}
	
	private void depthFirst(String crawler, JSONArray steps, int level, int width) {
		JSONObject stepConfig = findInArray(steps, String.valueOf(level));
		if(stepConfig != null) {
			String name = stepConfig.getString("name"), url = stepConfig.getString("url");
			log.info("crawl {}.{} width={} url={}", crawler, level, width, url);
			if(!StringUtil.isBlank(name) && StringUtil.isUrl(url)) {
				List<String> runStep = runStep(crawler, name, Arrays.asList(url));
				stats.get().progress(level);
				if(width>0 && width<runStep.size()) {
					log.info("crawl {}.{} width={} runStep={} filterd={}", crawler, level, runStep.size(), width);
					runStep = new LinkedList<>(Arrays.asList(runStep.toArray(new String[runStep.size()])).subList(0, width));
				}
				if(!runStep.isEmpty()) {
					level += 1;//深入下一级
					stepConfig = findInArray(steps, String.valueOf(level));
					stats.get().start(level, runStep.size());
					for(String str : runStep) {
						stepConfig.put("url", str);
						depthFirst(crawler, steps, level, width);
						log.info("stats: {}", stats.get().stats(level));
					}
					log.warn("stats: {}", stats.get().stats(level));
				}
			}
		}
	}
	
	private JSONObject findInArray(JSONArray array, String name) {
		for(int i=0;i<array.size();i++) {
			JSONObject config = array.getJSONObject(i);
			if(name.equals(config.getString("name"))
					|| name.equals(config.getString("level"))) {
				return config;
			}
		}
		return null;
	}

	private List<String> runStep(String crawler, String step, List<String> urls) {
		List<String> nextUrls = new LinkedList<>();
		for(String url : urls) {
			log.info("crawl {}.{} url {}", crawler, step, url);
			String html = RedisConfig.get(RedisConfig.HTML, url);;
			if(StringUtil.isBlank(html)) {
				log.info("get url: {}", url);
				ThreadUtil.safeSleep(NumberUtil.parseInt(RedisConfig.get("crawler.crawl.sleep"), 50));
				html = get(url);
				if(StringUtil.isBlank(html)) {
					log.info("fail to get html, url: {}", url);
					continue;
				}
				RedisConfig.set(RedisConfig.HTML, url, html);
			}
			String resp = cheerioEval(crawler, step, url);
			JSONObject json = JsonUtil.parseNew(resp);
			JSONArray urlsArray = json.getJSONArray("urls");
			int urlsSize = urlsArray==null ? 0 : urlsArray.size();
			if(urlsSize > 0) {
				nextUrls.addAll(urlsArray.toJavaList(String.class));
			}
			log.info("url={} has {} urls: {}", url, urlsSize, nextUrls);
			saveData(json.get("data"));
		}
		log.info("crawl {}.{} get {} urls", crawler, step, nextUrls.size());
		return nextUrls;
	}
	
	private String cheerioEval(String crawler, String step, String url) {
		String stepKey = "crawler.crawl."+crawler+"."+step;
		String service = StringUtil.firstNotBlank(RedisConfig.get("crawler.crawl.ourjs"), "http://localhost:8055")+"/crawl.json";
		String htmlUrl = service+"?url=html:"+url.replace("?", "%3F").replace("=", "%3D").replace("&", "%26")+"&step=property:"+stepKey;
		return get(htmlUrl);
	}
	
	private String get(String url) {
		return FileUtil.readString(FileUtil.stream(url), CharsetNames.UTF_8);
	}
	
	private void saveData(Object data) {
		log.info("save crawl data: {}", data);
		if(data == null) return;
		if(data instanceof JSONObject) {
			saveData((JSONObject)data);
		}else if(data instanceof JSONArray) {
			JSONArray array = (JSONArray)data;
			for(int i=0;i<array.size();i++) {
				saveData(array.getJSONObject(i));
			}
		}
	}
	
	private void saveData(JSONObject item) {
		if(item==null || item.size()==0) return;
		for(String key : item.keySet()) {
			try {
				JSONObject obj = item.getJSONObject(key);
				if(obj == null) continue;
				String saveKey = null;
				saveKey = StringUtil.getPatternString(key, "^hset_(.+_.+)$");
				if(StringUtil.hasLength(saveKey)) {
					String[] split = saveKey.split("_");
					RedisConfig.hset(RedisConfig.CACHE, split[0], split[1], obj.toJSONString());
					log.info("hset {} {} {}", split[0], split[1], obj);
					continue;
				}
				saveKey = StringUtil.getPatternString(key, "^hsetnx_(.+_.+)$");
				if(StringUtil.hasLength(saveKey)) {
					String[] split = saveKey.split("_");
					if(StringUtil.isBlank(RedisConfig.hget(RedisConfig.CACHE, split[0], split[1]))){
						RedisConfig.hset(RedisConfig.CACHE, split[0], split[1], obj.toJSONString());
						log.info("hset {} {} {}", split[0], split[1], obj);
					}
					continue;
				}
				log.info("unsupported key: {}, data: {}", key, obj);
			}catch(Exception e) {
				continue;
			}
		}
	}
	
}
